Google 圖片
Google 圖片
Data Scientist: The Sexiest Job of the 21st Century.
http://drewconway.com/zia/2013/3/26/the-data-science-venn-diagram
For R (a domain specific language for data science) to rank in the 6th. Other data-oriented languages appear in the Top 50 rankings, including Matlab (#15), SQL (#23), Julia (#31) and SAS (#37).
The way R works is pretty straightforward, you apply functions to objects. Greg Martain
install.packages("gapminder")
library(gapminder)data("gapminder")
dim(gapminder)
summary(gapminder)
str(gapminder)install.packages("dplyr")
library(dplyr)gapminder %>%
filter(country == 'Taiwan' |
country == 'South Africa') %>%
group_by(country) %>%
summarise(avg_lifeExp = mean(lifeExp))t.test()df_ttest <- gapminder %>%
filter(country == 'Taiwan' |
country == 'South Africa')
t.test(data = df_ttest, lifeExp ~ country)install.packages("ggplot2")
library(ggplot2)gg1 <- gapminder %>%
filter(gdpPercap < 50000) %>%
ggplot(aes(x = gdpPercap, y = lifeExp)) +
geom_point()gg1gg2 <- gapminder %>%
filter(gdpPercap < 50000) %>%
ggplot(aes(x = gdpPercap, y = lifeExp, col = continent)) +
geom_point(alpha = 0.3)gg2gg3 <- gapminder %>%
filter(gdpPercap < 50000) %>%
ggplot(aes(x = gdpPercap, y = lifeExp, col = continent)) +
geom_point(alpha = 0.3) +
geom_smooth()gg3## `geom_smooth()` using method = 'loess'
gg4 <- gapminder %>%
filter(gdpPercap < 50000) %>%
ggplot(aes(x = gdpPercap, y = lifeExp, col = continent)) +
geom_point(alpha = 0.3) +
geom_smooth(method = "lm") +
facet_wrap(~continent)gg4data.frame() 函數可以建立資料框(向量長度要相同)dim()summary()str()names()head()tail()[[]] 來選擇元素$ 來選擇元素https://storage.googleapis.com/learn-r-the-easy-way.appspot.com/udemy_courses/data_import.zip
read.csv() 函數csv_file_path <- "Your csv file path"
df <- read.csv(csv_file_path)read.table() 函數txt_file_path <- "Your text file path"
df <- read.table(txt_file_path, sep = "Text file separator", header = TRUE)readxl::read_excel() 函數install.packages("readxl")
library(readxl)
xlsx_file_path <- "Your excel file path"
df <- read_excel(xlsx_file_path)jsonlite::fromJSON() 函數install.packages("jsonlite")
library(jsonlite)
json_file_path <- "Your json file path"
data_list <- fromJSON(json_file_path)rvest 套件rvest to the rescue!rvestinstall.packages("rvest")
library(rvest)read_html() 搞定 requestlibrary(rvest)## Loading required package: xml2
html_doc <- "http://www.imdb.com/title/tt3783958/" %>%
read_html()html_nodes() 搞定 parserelem <- html_doc %>%
html_nodes(css = "strong span")
# html_nodes(xpath = "//strong/span")html_text() 清理標籤rating <- elem %>%
html_text() %>%
as.numeric()rvest 套件、jsonlite 套件負責擷取網路上的財報資料%>% 負責篩選資料ggplot2 套件負責繪圖做人為判斷install.packages(c("rvest", "jsonlite", "ggplot2", "magrittr"))
library(rvest)
library(jsonlite)
library(ggplot2)
library(magrittr)